import time

import requests
from lxml import etree

import csv

header = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
}

url = 'https://movie.douban.com/top250'

param = {
    'start' : 0
}

proxys = {
    'https': 'socks5h://117.161.170.163:9006'
}

resp = requests.get(url,headers=header)

# //ol[@class="grid_view"]//span[@class="title"][1]
# //ol[@class="grid_view"]//span[@class="rating_num"][1]
# //div[@class="bd"]//span[4]
# //ol//img/@src

#socks5h:222.59.173.105:44165

names = []
rates = []
persons = []

for i in range(0,251,25):
    param['start'] = i
    resp = requests.get(url, headers=header, params=param)
    if resp.status_code == 200:
        e = etree.HTML(resp.text)
        name = e.xpath('//ol[@class="grid_view"]//span[@class="title"][1]/text()')
        rat = e.xpath('//ol[@class="grid_view"]//span[@class="rating_num"][1]/text()')
        ratnum = e.xpath('//div[@class="bd"]//span[4]/text()')
        imgs = e.xpath('//ol//img/@src')
        names[-1::] = name
        rates[-1::] = rat
        persons[-1::] = ratnum
        for a,b,img in zip(name, rat, imgs):
            img_res = requests.get(img)
            filename = f"img/{a}.jpg"
            with open(filename, "wb") as f:
                f.write(img_res.content)
        time.sleep(1)

for n,r,p in zip(names,rates,persons):
    print(f"{n}--->{r}--->{p}")

with open('movie.csv','w',encoding='utf-8',newline='') as f:
    fn = ['电影名称','评分','评分人数']
    writer = csv.DictWriter(f,fieldnames=fn)
    for a,b,c in zip(names,rates,persons):
        writer.writerow({'电影名称':a,'评分':b,'评分人数':c})


print(len(names))
print(len(rates))
print(len(persons))
